#
# Requires:
	# 1) AsunkaEtAl_replication.dta  
		# NOTE: this file is a subset of the full dataset collected, which is available here (Miriam Golden's Dataverse page): https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/25BDLU
	
	# 2) EC_data.dta
		# These are the official voter registration data provided by Ghana's Electoral Commission


##############################
#Load the required libraries
##############################

install.packages("ri", "RItools")
install.packages("dplyr")


library(foreign,quietly = T)
library(dplyr,quietly = T)
library(ri,quietly = T)
library(RItools)
library(xtable,quietly = T)
library(DataCombine,quietly = T)
library(xtable)

################
#Read in data
###############

# Set WD to folder with data
setwd("~/Dropbox/observer_paper/new_observer_paper/observer_paper_replication_file")

## input VIR data from Golden's dataverse
dat <-read.dta("AsunkaEtAl_replication.dta")
#names(dat)

## Input Electoral commission data voter registration data
ec <- read.dta("EC_data.dta")

## Merge EC data to VIR data
d_sub1 <- merge(dat, ec, by="pscode")
 names(d_sub1)

########################################
#####################################################
#Recode variables from character to integer

data=mutate(d_sub1,eovharass1=ifelse(eovharass1=="Yes",1,0),
               eovharass2=ifelse(eovharass2=="Yes",1,0), treatment=ifelse(observer=="Yes",1,0),
               competitive=ifelse(competition=="Competitive",1,0),
               urban=ifelse(stationdensity=="Rural",0,1))

# Create turnout measures
data$turnout1 <- (data$validvotes1 + data$rejectballot1)/data$totalvoters_ec
data$turnout2 <- (data$validvotes2 + data$rejectballot2)/data$totalvoters_ec

#Repeat values collected by CODEO observers (treated units) for second VIR surveys
data=mutate(data, eovharass2=ifelse(observer=="Yes",eovharass1,eovharass2),
               turnout2=ifelse(observer=="Yes",turnout1,turnout2)
      )

# Creating dependent variables using matched pairs (only use data where survey 1 and survey 2 report the same outcome value)
##--------
data=mutate(data,
             turnout=ifelse(turnout1==turnout2,turnout1,NA),
             eovharass=ifelse(eovharass1==eovharass2,eovharass1,NA)
             )

data=mutate(data,   
            block=as.numeric(factor(d_sub1$const)),
             # Create weight variable  
             #weighting: Inverse prob. weights. 
             #(Idea: give more weight to the unit that has a lower chance of being treated)
             # High treated weight= 1.25 (1/0.8)
             # High control weight= 5 (1/0.2)
             # Medium treated/control weight=2 (1/0.5)
             # Low treated weight = 3.33 (1/0.3)
             # Low control weight = 1.43 (1/0.7)
             weight_iv=ifelse(sat=="High"&treatment==1,1/0.8,
                              ifelse(sat=="High"&treatment==0,1/0.2,
                                     #ifelse(sat=="Middle",1/0.5,
                                            ifelse(sat=="Low"&treatment==1,1/0.3,
                                            	ifelse(sat=="Low"&treatment==0, 1/0.7,2)))))

## remove one extreme value of turnout

data <- data[data$turnout < 5,]


########################################################################
########################################################################
## Generate outlier measures of fraud

## outlier1 -- outlier defined as 1 if 2 SDs above mean in constituency, 0 otherwise

rob_dat<-select(data,const,turnout) #select constituency and turnout variables from main dataset

robdat_grp<-group_by(rob_dat,const) #group data by constituency to compute constituency level estimates

const_sum<-summarize(robdat_grp,const_to=mean(turnout, na.rm=T),sdv=sd(turnout,na.rm = T), iqr=IQR(turnout,na.rm = T), upqr=quantile(turnout, 0.75, na.rm = T)) #compute constituency level estimates
# Notes: const_to=Average turnout in constituency, sdv=Standard deviation of turnout in constituency, iqr=interquartile range of turnout, upqr=upper quartile of turnout distribution. 

const_sum<-mutate(const_sum,mean_ubto=const_to+1.96*sdv,med_ubto=upqr+1.5*iqr) # compute upper limits for turnout (i.e., 2 standard deviations away and upqr+1.5*iqr)

#Merge calculation into main dataset
data<-merge(data,const_sum,by="const", all.x = TRUE, all.y = FALSE)


data<-mutate(data,outlier1_2sd=ifelse(turnout>mean_ubto,1,0), outlier2_iqr=ifelse(turnout>med_ubto,1,0)) # generate alternate fraud measures based on turntout

#names(data)
table(data$outlier1_2sd)
table(data$outlier2_iqr)


 #######################################
##------------------------------
# Table 1: Summary Statistics 
##------------------------------

# Groups: Full sample, by treatement, and by level of competition 

#Full sample (fs):
fs_sum<-with(data, c(mean(eovharass,na.rm = T),sd(eovharass,na.rm = T), sum(!is.na(eovharass)),
					mean(turnout,na.rm = T),sd(turnout,na.rm = T), sum(!is.na(turnout)),
                      mean(outlier1_2sd,na.rm = T),sd(outlier1_2sd,na.rm = T), sum(!is.na(outlier1_2sd)),
                      mean(outlier2_iqr,na.rm = T),sd(outlier2_iqr,na.rm = T), sum(!is.na(outlier2_iqr))   
                     )
                 )


#summary by treatment
dat_grp<-group_by(data,treatment)

treatment_sum <-summarize(dat_grp,mn_eov=mean(eovharass,na.rm = T),sd_eov=sd(eovharass,na.rm = T), n_eov=sum(!is.na(eovharass)), 
						mn_to=mean(turnout,na.rm = T),sd_to=sd(turnout,na.rm = T), n_to=sum(!is.na(turnout)),
                      mn_o1= mean(outlier1_2sd,na.rm = T),sd_o1=sd(outlier1_2sd,na.rm = T), n_o1=sum(!is.na(outlier1_2sd)),
                      mn_o2=mean(outlier2_iqr,na.rm = T),sd_o2=sd(outlier2_iqr,na.rm = T), n_o2=sum(!is.na(outlier2_iqr))
					)

treatment_sum<-t(treatment_sum[1:2,-1])

#summary by level of competition
dat_grp<-group_by(data,competition)

comp_sum<-summarize(dat_grp,mn_eov=mean(eovharass,na.rm = T),sd_eov=sd(eovharass,na.rm = T), n_eov=sum(!is.na(eovharass)), 
						mn_to=mean(turnout,na.rm = T),sd_to=sd(turnout,na.rm = T), n_to=sum(!is.na(turnout)),
                      mn_o1= mean(outlier1_2sd,na.rm = T),sd_o1=sd(outlier1_2sd,na.rm = T), n_o1=sum(!is.na(outlier1_2sd)),
                      mn_o2=mean(outlier2_iqr,na.rm = T),sd_o2=sd(outlier2_iqr,na.rm = T), n_o2=sum(!is.na(outlier2_iqr))
					)
					
					
					
					
comp_sum<-t(comp_sum[1:2,-1])

#create summary Table 1
sum_tab=cbind(fs_sum,treatment_sum,comp_sum)


colnames(sum_tab)<-c("Full Sample", "Control", "Treatment", "Non-Competitive", "Competitive")
rownames(sum_tab)<-c( "Intimidation during voting", "SD_v", "N_v", "Turnout","SD_to", "N_to","Outlier (2SDs)","SD_to1", "N_to1", "Outlier (IQR)", "SD_to2", "N_to2")

sum_tab <- round(sum_tab, 3)

##

## Produce summary Table in LaTex format 
# some reformatting done by hand for publication
xtable(sum_tab)



########################################################################
########################################################################
########################################################################

## Figure 1 -- ATEs for fraud and intimidatoin

#Note: Randomization inference is used to generate the 95 percent confidence interval round the estimated ATEs


ps_dat=select(data,block,weight_iv,treatment,turnout,eovharass)

#1. Turnout estimates
todat=na.omit(select(ps_dat,-eovharass))
dim(todat)   

set.seed(8968668)
perms <- genperms(todat$treatment, clustvar =todat$block) # all possible permutations

probs <- todat$weight_iv

ate=estate(Y = todat$turnout, Z =todat$treatment,prob = probs)
Ys <- genouts(todat$turnout,todat$treatment,ate=ate) 
distout <- gendist(Ys,perms,prob=probs) 
sampdist<-dispdist(distout,ate) 

turnout<-ate
turnout_se<-sampdist$sd
turnout_ul<-sampdist$quantile[2]
turnout_ll<-sampdist$quantile[1]            


###########
# Intimidation
pvdat=na.omit(select(ps_dat,-turnout))
dim(pvdat)

set.seed(857580)
perms <- genperms(pvdat$treatment, clustvar =pvdat$block) # all possible permutations

probs <- pvdat$weight_iv

ate=estate(Y = pvdat$eovharass, Z =pvdat$treatment,prob = probs)
Ys <- genouts(pvdat$eovharass,pvdat$treatment,ate=ate) 
distout <- gendist(Ys,perms,prob=probs) 
sampdist<-dispdist(distout,ate)  

eovharass<-ate
eovharass_se<-sampdist$sd
eovharass_ul<-sampdist$quantile[2]
eovharass_ll<-sampdist$quantile[1]

######################
## outliers


ps_dat=select(data,block,weight_iv,treatment,outlier1_2sd,outlier2_iqr)

# 2 SDS
todat=na.omit(select(ps_dat,-outlier2_iqr))
dim(todat)   

set.seed(8968668)
perms <- genperms(todat$treatment, clustvar =todat$block) # all possible permutations

probs <- todat$weight_iv

ate=estate(Y = todat$outlier1_2sd, Z =todat$treatment,prob = probs)
Ys <- genouts(todat$outlier1_2sd,todat$treatment,ate=ate) 
distout <- gendist(Ys,perms,prob=probs) 
sampdist<-dispdist(distout,ate) 

out1<-ate
out1_se<-sampdist$sd
out1_ul<-sampdist$quantile[2]
out1_ll<-sampdist$quantile[1]        


# IQR
todat=na.omit(select(ps_dat,-outlier1_2sd))
dim(todat)   
names(todat)
set.seed(8968668)
perms <- genperms(todat$treatment, clustvar =todat$block) # all possible permutations

probs <- todat$weight_iv

ate=estate(Y = todat$outlier2_iqr, Z =todat$treatment,prob = probs)
Ys <- genouts(todat$outlier2_iqr,todat$treatment,ate=ate) 
distout <- gendist(Ys,perms,prob=probs) 
sampdist<-dispdist(distout,ate) 

out2<-ate
out2_se<-sampdist$sd
out2_ul<-sampdist$quantile[2]
out2_ll<-sampdist$quantile[1]    

######
## create a plot


ate<-c(eovharass, turnout,out1, out2 )
se<-c(eovharass_se, turnout_se,out1_se, out2_se )
ll<-c(eovharass_ll, turnout_ll, out1_ll, out2_ll )
ul<-c(eovharass_ul, turnout_ul,out1_ul, out2_ul )

dat<-rbind(ate,se,ll,ul)
colnames(dat)<-c("eovharass", "turnout","outlier (2SDs)", "outlier (IQR)")

step<-1:4

main<-c(dat[1,])
ll<-c(dat[3,])
ul<-c(dat[4,])


par(mfrow=c(1, 1))
plot(step,main, axes = F,ylim=c(-0.15, 0.15),xlim=c(0.5,4.5), ylab="Average Treatment Effect", xlab="", pch=19, las=1, main="")
grid()
axis(at = c(1,2, 3, 4), labels = c("Intimidation", "Turnout", "Outlier (2SDs)","Outlier (IQR)" ), side = 1, cex.axis=0.9)
axis(side = 2,las=1)
segments(step,ll, step, ul)
abline(h=0,lty=2)



#######################################################
############################################################
########################################################
##
## Table 2


## Calculate condition mean, ITTs, ASNT, TCE  
##############
indicators<-c("eovharass", "turnout", "outlier1_2sd","outlier2_iqr") 
#dat$sat
results_list<-{}

for (j in 1:length(indicators)){
  #Select relavant variables for analysis
  fraud=data[,c(indicators[j])]
  treat=data[,c("treatment")]
  weight=data[,c("weight_iv")]
  sat=data[,c("sat")]
  p<-c("Low","Middle","High")
  #Create a temporary dataset to omit missing cases 
  temp<-na.omit(data.frame(fraud=fraud,treat=treat,weight=weight,sat=sat))
  #print(dim(temp))
  
  #select variables from temporary dataset
  fraud=temp$fraud; treat=temp$treat; weight=temp$weight; sat=temp$sat
  #create emptry matrix to store estimates
  result=matrix(NA,nrow=10,ncol=3)
  #Loop over saturation levels computing each estimate:Conditional means, ITT,TCE, ASNT
  for(i in 1:3){
    #Calculate weight mean and S.error for treated units
    result[1,i]<-weighted.mean(fraud[treat==1&sat==p[i]],w =weight[treat==1&sat==p[i]])
    result[2,i]=sd(fraud[treat==1&sat==p[i]])/sqrt(length(fraud[treat==1&sat==p[i]]))
    #Calculate weight mean and S.error for control units
    result[3,i]=weighted.mean(fraud[treat==0&sat==p[i]],w = weight[treat==0&sat==p[i]])
    result[4,i]=sd(fraud[treat==0&sat==p[i]])/sqrt(length(fraud[treat==0&sat==p[i]])) 
    #Calculate ITT and S Error: (Mean treated in sat - Mean control in low sat.) 
    result[5,i] = (weighted.mean(fraud[treat==1&sat==p[i]],w =weight[treat==1&sat==p[i]])) - (weighted.mean(fraud[treat==0&sat=="Low"], w = weight[treat==0&sat=="Low"]))
    result[6,i] = sqrt( ((sd(fraud[treat==1&sat==p[i]])/sqrt(length(fraud[treat==1&sat==p[i]])))^2) + ((sd(fraud[treat==0&sat=="Low"])/sqrt(length(fraud[treat==0&sat=="Low"])))^2))
    #Calculate TCE and S. Error: Mean across T+C in sat. level - Mean control in low sat.    
    result[7,i] = (weighted.mean(fraud[sat==p[i]],w =weight[sat==p[i]])) - (weighted.mean(fraud[treat==0&sat=="Low"], w = weight[treat==0&sat=="Low"]))
    result[8,i] = sqrt( ((sd(fraud[sat==p[i]])/sqrt(length(fraud[sat==p[i]])))^2) + ((sd(fraud[treat==0&sat=="Low"])/sqrt(length(fraud[treat==0&sat=="Low"])))^2))
    #Calculate ASNT and S. Error: Mean control in sat - Mean control in low. sat. 
    result[9,i] = (weighted.mean(fraud[treat==0&sat==p[i]],w =weight[treat==0&sat==p[i]])) - (weighted.mean(fraud[treat==0&sat=="Low"], w = weight[treat==0&sat=="Low"]))
    result[10,i] = sqrt( ((sd(fraud[treat==0&sat==p[i]])/sqrt(length(fraud[treat==0&sat==p[i]])))^2) + ((sd(fraud[treat==0&sat=="Low"])/sqrt(length(fraud[treat==0&sat=="Low"])))^2))
    #Column names 
    colnames(result)<-c("Low","Middle", "High")     
    rownames(result)<-c("treated", "se_treated", "ctrl", "se_ctrl", "itt_diff",
                        "se_itt","tce","se_tce","asnt","se_asnt")
    results_list[[indicators[j]]]<-result
  }  
}

sat_summ<-rbind(results_list[[1]],results_list[[2]], results_list[[3]], results_list[[4]])
tab2<-round(sat_summ[-c(7:10,17:20, 27:30, 37:40),], 3)
tab2
rownames(tab2)<-c("Treated Intimidation", "se_t_v",
                  "Control Intimidation", "se_c_v","ITT(s)_v","se_itt_v",
                  "Treated turnout", "se_t_to", "Control_turnout",
                  "se_c_to","ITT(s)_to","se_itt_to",
                  "Treated outlier (sd)", "se_t_toosd", "Control outlier (sd)",
                  "se_c_osd","ITT(s)_osd","se_itt_osd",
                  "Treated outlier (iqr)", "se_t_toiqr", "Control outlier (iqr)",
                  "se_c_oiqr","ITT(s)_oiqr","se_itt_oiqr")

## Table 2                  
tab2 <- round(tab2, 3)

## Produce summary Table in LaTex format 
# some reformatting done by hand for publication
xtable(tab2)



##########################################################################################
########################################################################################

## Figure 2

#######################################
####
#### ASNT


indicators<-c("eovharass", "turnout","outlier1_2sd", "outlier2_iqr" ) 

comp_results<-{}

comp<-c("Competitive","Stronghold")
## Loop over Comp. / Non-Comp. 
for(g in 1:2){
  subset<-filter(data,competition==comp[g])
  results_list<-{}
  ## Loop over Indicators (same as above)
  for (j in 1:length(indicators)){ 
    fraud=subset[,c(indicators[j])]
    treat=subset[,c("treatment")]
    weight=subset[,c("weight_iv")]
    sat=subset[,c("sat")]
    p<-c("Low","Middle","High")
    temp<-na.omit(data.frame(fraud=fraud,treat=treat,weight=weight,sat=sat))
    fraud=temp$fraud;treat=temp$treat;weight=temp$weight; sat=temp$sat
    result=matrix(NA,nrow=10,ncol=3)
    
    for(i in 1:3){
      result[1,i]<-weighted.mean(fraud[treat==1&sat==p[i]],w =weight[treat==1&sat==p[i]])
      result[2,i]=sd(fraud[treat==1&sat==p[i]])/sqrt(length(fraud[treat==1&sat==p[i]]))
      result[3,i]=weighted.mean(fraud[treat==0&sat==p[i]],w = weight[treat==0&sat==p[i]])
      result[4,i]=sd(fraud[treat==0&sat==p[i]])/sqrt(length(fraud[treat==0&sat==p[i]])) 
      result[5,i] = (weighted.mean(fraud[treat==1&sat==p[i]],w =weight[treat==1&sat==p[i]])) - (weighted.mean(fraud[treat==0&sat=="Low"], w = weight[treat==0&sat=="Low"]))
      result[6,i] = sqrt( ((sd(fraud[treat==1&sat==p[i]])/sqrt(length(fraud[treat==1&sat==p[i]])))^2) + ((sd(fraud[treat==0&sat=="Low"])/sqrt(length(fraud[treat==0&sat=="Low"])))^2))
      
      result[7,i] = (weighted.mean(fraud[sat==p[i]],w =weight[sat==p[i]])) - (weighted.mean(fraud[treat==0&sat=="Low"], w = weight[treat==0&sat=="Low"]))
      result[8,i] = sqrt( ((sd(fraud[sat==p[i]])/sqrt(length(fraud[sat==p[i]])))^2) + ((sd(fraud[treat==0&sat=="Low"])/sqrt(length(fraud[treat==0&sat=="Low"])))^2))
      
      result[9,i] = (weighted.mean(fraud[treat==0&sat==p[i]],w =weight[treat==0&sat==p[i]])) - (weighted.mean(fraud[treat==0&sat=="Low"], w = weight[treat==0&sat=="Low"]))
      result[10,i] = sqrt( ((sd(fraud[treat==0&sat==p[i]])/sqrt(length(fraud[treat==0&sat==p[i]])))^2) + ((sd(fraud[treat==0&sat=="Low"])/sqrt(length(fraud[treat==0&sat=="Low"])))^2))
      
      colnames(result)<-c("Low","Middle", "High")     
      rownames(result)<-c("treated", "se_treated", "ctrl", "se_ctrl", "itt_diff", "se_itt","tce","se_tce","asnt","se_asnt")
    }
    results_list[[indicators[j]]]<-result
  }  
  comp_results[[comp[g]]]<-results_list
}


#row bind results for competitive and strongholds. Then column bind results
comp_h<-rbind(comp_results[[1]][[1]],comp_results[[1]][[2]], comp_results[[1]][[3]], comp_results[[1]][[4]])
comp_l<-rbind(comp_results[[2]][[1]],comp_results[[2]][[2]], comp_results[[2]][[3]], comp_results[[2]][[4]])

sat_comp_results<-cbind(comp_h,comp_l)
sat_comp_results

## create table for the appendix

appC <- round(sat_comp_results, 3)
xtable(appC)



### Pull out the ASNT results to produce Figure 2

########
#pdf("~/Dropbox/observer_paper/new_observer_paper/figures/spillover_effects.pdf",width=7,height=5)

# Figure 2


par(mfrow=c(1,2))
pos=1:2

#Intimidation during voting
est_comp=c(sat_comp_results[9,2],sat_comp_results[9,3])
se_comp=c(sat_comp_results[10,2],sat_comp_results[10,3])
est_strh=c(sat_comp_results[9,5],sat_comp_results[9,6])
se_strh=c(sat_comp_results[10,5],sat_comp_results[10,6])

plot(pos,est_comp,axes = F,ylim=c(-0.25, 0.4), ylab="ASNT(s)", xlab=" ", main="", pch=19, las=1)

title(main = list("Competitive constituencies\n\n Intimidation", cex = 0.8))

grid()
axis(at = c(1,2), labels = c("Medium", "High"), side = 1, cex.axis=0.9)
axis(side = 2,las=1)
segments(pos,est_comp+se_comp*1.96, pos, est_comp-se_comp*1.96)
abline(h=0,lty=2)

plot(pos,est_strh,axes = F,ylim=c(-0.25, 0.4), ylab="", xlab=" ", main=" ", pch=19, las=1)
title(main = list("Non-competitive constituencies \n \n Intimidation", cex = 0.8))

grid()
axis(at = c(1,2), labels = c("Medium", "High"), side = 1, cex.axis=0.9)
axis(side = 2,las=1)
segments(pos,est_strh+se_strh*1.96, pos, est_strh-se_strh*1.96)
abline(h=0,lty=2)


###########################################################
# Figure 3


par(mfrow=c(3,2))
ylim <-c(-.2, .1)

#Turnout
est_comp=c(sat_comp_results[19,2],sat_comp_results[19,3])
se_comp=c(sat_comp_results[20,2],sat_comp_results[20,3])
est_strh=c(sat_comp_results[19,5],sat_comp_results[19,6])
se_strh=c(sat_comp_results[20,5],sat_comp_results[20,6])


plot(pos,est_comp,axes = F,ylim=ylim, ylab="ASNT(s)", xlab="Observer saturation level", main="", pch=19, las=1)

title(main = list("Competitive constituencies\n\n Turnout", cex = 0.8))

grid()
axis(at = c(1,2), labels = c("Medium", "High"), side = 1, cex.axis=0.9)
axis(side = 2,las=1, at=seq(-.2,.1,by=.1))
segments(pos,est_comp+se_comp*1.96, pos, est_comp-se_comp*1.96)
abline(h=0,lty=2)

plot(pos,est_strh,axes = F,ylim=ylim, ylab=" ", xlab="Observer saturation level", main=" ", pch=19, las=1)
title(main = list("Non-competitive constituencies \n \n Turnout", cex = 0.8))

grid()
axis(at = c(1,2), labels = c("Medium", "High"), side = 1, cex.axis=0.9)
axis(side = 2,las=1, at=seq(-.2,.1,by=.1))
segments(pos,est_strh+se_strh*1.96, pos, est_strh-se_strh*1.96)
abline(h=0,lty=2)



#Outlier 2SDS
est_comp=c(sat_comp_results[29,2],sat_comp_results[29,3])
se_comp=c(sat_comp_results[30,2],sat_comp_results[30,3])
est_strh=c(sat_comp_results[29,5],sat_comp_results[29,6])
se_strh=c(sat_comp_results[30,5],sat_comp_results[30,6])


plot(pos,est_comp,axes = F,ylim=ylim, ylab="ASNT(s)", xlab="", main="", pch=19, las=1)
#title(main = list("Competitive constituencies\n\n Turnout", cex = 0.8))

title(main = list("Outlier (2SDs)", cex = 0.8))
#title(main = list("Outlier (2SDs)", cex = 0.8))

grid()
axis(at = c(1,2), labels = c("Medium", "High"), side = 1, cex.axis=0.9)
axis(side = 2,las=1, at=seq(-.2,.1,by=.1))
segments(pos,est_comp+se_comp*1.96, pos, est_comp-se_comp*1.96)
abline(h=0,lty=2)

plot(pos,est_strh,axes = F,ylim=ylim, ylab=" ", xlab="", main=" ", pch=19, las=1)
title(main = list("Outlier (2SDs)", cex = 0.8))
#title(main = list("Outlier (2SDs)", cex = 0.8))

grid()
axis(at = c(1,2), labels = c("Medium", "High"), side = 1, cex.axis=0.9)
axis(side = 2,las=1, at=seq(-.2,.1,by=.1))
segments(pos,est_strh+se_strh*1.96, pos, est_strh-se_strh*1.96)
abline(h=0,lty=2)





#Outlier IQR
est_comp=c(sat_comp_results[39,2],sat_comp_results[39,3])
se_comp=c(sat_comp_results[40,2],sat_comp_results[40,3])
est_strh=c(sat_comp_results[39,5],sat_comp_results[39,6])
se_strh=c(sat_comp_results[40,5],sat_comp_results[40,6])


plot(pos,est_comp,axes = F,ylim=ylim, ylab="ASNT(s)", xlab="Observer saturation level", main="", pch=19, las=1)

title(main = list("Outlier (IQR)", cex = 0.8))

grid()
axis(at = c(1,2), labels = c("Medium", "High"), side = 1, cex.axis=0.9)
axis(side = 2,las=1, at=seq(-.2,.1,by=.1))
segments(pos,est_comp+se_comp*1.96, pos, est_comp-se_comp*1.96)
abline(h=0,lty=2)

plot(pos,est_strh,axes = F,ylim=ylim, ylab=" ", xlab="Observer saturation level", main=" ", pch=19, las=1)
title(main = list("Outlier (IQR)", cex = 0.8))

grid()
axis(at = c(1,2), labels = c("Medium", "High"), side = 1, cex.axis=0.9)
axis(side = 2,las=1, at=seq(-.2,.1,by=.1))
segments(pos,est_strh+se_strh*1.96, pos, est_strh-se_strh*1.96)
abline(h=0,lty=2)


########################################################################################
########################################################################################
########################################################################################

########################################################################
########################################################################
########################################################################

## Replicate Figure 1, progressively dropping high values of turnout


data_analysis <- data[data$turnout < 3,]

## Re- Generate outlier measures of fraud 
## outlier1 -- outlier defined as 1 if 2 SDs above mean in constituency, 0 otherwise
rob_dat<-select(data_analysis,const,turnout) #select constituency and turnout variables from main dataset
robdat_grp<-group_by(rob_dat,const) #group data by constituency to compute constituency level estimates
const_sum<-summarize(robdat_grp,const_to=mean(turnout, na.rm=T),sdv=sd(turnout,na.rm = T), iqr=IQR(turnout,na.rm = T), upqr=quantile(turnout, 0.75, na.rm = T)) #compute constituency level estimates
# Notes: const_to=Average turnout in constituency, sdv=Standard deviation of turnout in constituency, iqr=interquartile range of turnout, upqr=upper quartile of turnout distribution. 
const_sum<-mutate(const_sum,mean_ubto=const_to+1.96*sdv,med_ubto=upqr+1.5*iqr) # compute upper limits for turnout (i.e., 2 standard deviations away and upqr+1.5*iqr)
#Merge calculation into main dataset
data_analysis<-merge(data_analysis,const_sum,by="const", all.x = TRUE, all.y = FALSE)
data_analysis<-mutate(data_analysis,outlier1_2sd=ifelse(turnout>mean_ubto,1,0), outlier2_iqr=ifelse(turnout>med_ubto,1,0)) # generate alternate fraud measures based on turntout

# Select out the measures
ps_dat=select(data_analysis,block,weight_iv,treatment,turnout,eovharass)

#1. Turnout estimates
todat=na.omit(select(ps_dat,-eovharass))
dim(todat)   

set.seed(8968668)
perms <- genperms(todat$treatment, clustvar =todat$block) # all possible permutations

probs <- todat$weight_iv

ate=estate(Y = todat$turnout, Z =todat$treatment,prob = probs)
Ys <- genouts(todat$turnout,todat$treatment,ate=ate) 
distout <- gendist(Ys,perms,prob=probs) 
sampdist<-dispdist(distout,ate) 

turnout<-ate
turnout_se<-sampdist$sd
turnout_ul<-sampdist$quantile[2]
turnout_ll<-sampdist$quantile[1]            


###########
# Intimidation
pvdat=na.omit(select(ps_dat,-turnout))
dim(pvdat)

set.seed(857580)
perms <- genperms(pvdat$treatment, clustvar =pvdat$block) # all possible permutations

probs <- pvdat$weight_iv

ate=estate(Y = pvdat$eovharass, Z =pvdat$treatment,prob = probs)
Ys <- genouts(pvdat$eovharass,pvdat$treatment,ate=ate) 
distout <- gendist(Ys,perms,prob=probs) 
sampdist<-dispdist(distout,ate)  

eovharass<-ate
eovharass_se<-sampdist$sd
eovharass_ul<-sampdist$quantile[2]
eovharass_ll<-sampdist$quantile[1]

######################
## outliers
ps_dat=select(data_analysis,block,weight_iv,treatment,outlier1_2sd,outlier2_iqr)

# 2 SDS
todat=na.omit(select(ps_dat,-outlier2_iqr))
dim(todat)   

set.seed(8968668)
perms <- genperms(todat$treatment, clustvar =todat$block) # all possible permutations

probs <- todat$weight_iv

ate=estate(Y = todat$outlier1_2sd, Z =todat$treatment,prob = probs)
Ys <- genouts(todat$outlier1_2sd,todat$treatment,ate=ate) 
distout <- gendist(Ys,perms,prob=probs) 
sampdist<-dispdist(distout,ate) 

out1<-ate
out1_se<-sampdist$sd
out1_ul<-sampdist$quantile[2]
out1_ll<-sampdist$quantile[1]        


# IQR
todat=na.omit(select(ps_dat,-outlier1_2sd))
dim(todat)   
names(todat)
set.seed(8968668)
perms <- genperms(todat$treatment, clustvar =todat$block) # all possible permutations

probs <- todat$weight_iv

ate=estate(Y = todat$outlier2_iqr, Z =todat$treatment,prob = probs)
Ys <- genouts(todat$outlier2_iqr,todat$treatment,ate=ate) 
distout <- gendist(Ys,perms,prob=probs) 
sampdist<-dispdist(distout,ate) 

out2<-ate
out2_se<-sampdist$sd
out2_ul<-sampdist$quantile[2]
out2_ll<-sampdist$quantile[1]    

######
## create a plot


ate<-c(eovharass, turnout,out1, out2 )
se<-c(eovharass_se, turnout_se,out1_se, out2_se )
ll<-c(eovharass_ll, turnout_ll, out1_ll, out2_ll )
ul<-c(eovharass_ul, turnout_ul,out1_ul, out2_ul )

dat<-rbind(ate,se,ll,ul)
colnames(dat)<-c("eovharass", "turnout","outlier (2SDs)", "outlier (IQR)")

step<-1:4

main_3<-c(dat[1,])
ll_3<-c(dat[3,])
ul_3<-c(dat[4,])



#-----------------
data_analysis <- data[data$turnout < 2,]

## Re- Generate outlier measures of fraud 
## outlier1 -- outlier defined as 1 if 2 SDs above mean in constituency, 0 otherwise
rob_dat<-select(data_analysis,const,turnout) #select constituency and turnout variables from main dataset
robdat_grp<-group_by(rob_dat,const) #group data by constituency to compute constituency level estimates
const_sum<-summarize(robdat_grp,const_to=mean(turnout, na.rm=T),sdv=sd(turnout,na.rm = T), iqr=IQR(turnout,na.rm = T), upqr=quantile(turnout, 0.75, na.rm = T)) #compute constituency level estimates
# Notes: const_to=Average turnout in constituency, sdv=Standard deviation of turnout in constituency, iqr=interquartile range of turnout, upqr=upper quartile of turnout distribution. 
const_sum<-mutate(const_sum,mean_ubto=const_to+1.96*sdv,med_ubto=upqr+1.5*iqr) # compute upper limits for turnout (i.e., 2 standard deviations away and upqr+1.5*iqr)
#Merge calculation into main dataset
data_analysis<-merge(data_analysis,const_sum,by="const", all.x = TRUE, all.y = FALSE)
data_analysis<-mutate(data_analysis,outlier1_2sd=ifelse(turnout>mean_ubto,1,0), outlier2_iqr=ifelse(turnout>med_ubto,1,0)) # generate alternate fraud measures based on turntout

# Select out the measures
ps_dat=select(data_analysis,block,weight_iv,treatment,turnout,eovharass)

#1. Turnout estimates
todat=na.omit(select(ps_dat,-eovharass))
dim(todat)   

set.seed(8968668)
perms <- genperms(todat$treatment, clustvar =todat$block) # all possible permutations

probs <- todat$weight_iv

ate=estate(Y = todat$turnout, Z =todat$treatment,prob = probs)
Ys <- genouts(todat$turnout,todat$treatment,ate=ate) 
distout <- gendist(Ys,perms,prob=probs) 
sampdist<-dispdist(distout,ate) 

turnout<-ate
turnout_se<-sampdist$sd
turnout_ul<-sampdist$quantile[2]
turnout_ll<-sampdist$quantile[1]            


###########
# Intimidation
pvdat=na.omit(select(ps_dat,-turnout))
dim(pvdat)

set.seed(857580)
perms <- genperms(pvdat$treatment, clustvar =pvdat$block) # all possible permutations

probs <- pvdat$weight_iv

ate=estate(Y = pvdat$eovharass, Z =pvdat$treatment,prob = probs)
Ys <- genouts(pvdat$eovharass,pvdat$treatment,ate=ate) 
distout <- gendist(Ys,perms,prob=probs) 
sampdist<-dispdist(distout,ate)  

eovharass<-ate
eovharass_se<-sampdist$sd
eovharass_ul<-sampdist$quantile[2]
eovharass_ll<-sampdist$quantile[1]

######################
## outliers
ps_dat=select(data_analysis,block,weight_iv,treatment,outlier1_2sd,outlier2_iqr)

# 2 SDS
todat=na.omit(select(ps_dat,-outlier2_iqr))
dim(todat)   

set.seed(8968668)
perms <- genperms(todat$treatment, clustvar =todat$block) # all possible permutations

probs <- todat$weight_iv

ate=estate(Y = todat$outlier1_2sd, Z =todat$treatment,prob = probs)
Ys <- genouts(todat$outlier1_2sd,todat$treatment,ate=ate) 
distout <- gendist(Ys,perms,prob=probs) 
sampdist<-dispdist(distout,ate) 

out1<-ate
out1_se<-sampdist$sd
out1_ul<-sampdist$quantile[2]
out1_ll<-sampdist$quantile[1]        


# IQR
todat=na.omit(select(ps_dat,-outlier1_2sd))
dim(todat)   
names(todat)
set.seed(8968668)
perms <- genperms(todat$treatment, clustvar =todat$block) # all possible permutations

probs <- todat$weight_iv

ate=estate(Y = todat$outlier2_iqr, Z =todat$treatment,prob = probs)
Ys <- genouts(todat$outlier2_iqr,todat$treatment,ate=ate) 
distout <- gendist(Ys,perms,prob=probs) 
sampdist<-dispdist(distout,ate) 

out2<-ate
out2_se<-sampdist$sd
out2_ul<-sampdist$quantile[2]
out2_ll<-sampdist$quantile[1]    

######
## create a plot


ate<-c(eovharass, turnout,out1, out2 )
se<-c(eovharass_se, turnout_se,out1_se, out2_se )
ll<-c(eovharass_ll, turnout_ll, out1_ll, out2_ll )
ul<-c(eovharass_ul, turnout_ul,out1_ul, out2_ul )

dat<-rbind(ate,se,ll,ul)
colnames(dat)<-c("eovharass", "turnout","outlier (2SDs)", "outlier (IQR)")

step<-1:4

main_2<-c(dat[1,])
ll_2<-c(dat[3,])
ul_2<-c(dat[4,])

#--------------------------------

#-----------------
data_analysis <- data[data$turnout < 1.5,]

## Re- Generate outlier measures of fraud 
## outlier1 -- outlier defined as 1 if 2 SDs above mean in constituency, 0 otherwise
rob_dat<-select(data_analysis,const,turnout) #select constituency and turnout variables from main dataset
robdat_grp<-group_by(rob_dat,const) #group data by constituency to compute constituency level estimates
const_sum<-summarize(robdat_grp,const_to=mean(turnout, na.rm=T),sdv=sd(turnout,na.rm = T), iqr=IQR(turnout,na.rm = T), upqr=quantile(turnout, 0.75, na.rm = T)) #compute constituency level estimates
# Notes: const_to=Average turnout in constituency, sdv=Standard deviation of turnout in constituency, iqr=interquartile range of turnout, upqr=upper quartile of turnout distribution. 
const_sum<-mutate(const_sum,mean_ubto=const_to+1.96*sdv,med_ubto=upqr+1.5*iqr) # compute upper limits for turnout (i.e., 2 standard deviations away and upqr+1.5*iqr)
#Merge calculation into main dataset
data_analysis<-merge(data_analysis,const_sum,by="const", all.x = TRUE, all.y = FALSE)
data_analysis<-mutate(data_analysis,outlier1_2sd=ifelse(turnout>mean_ubto,1,0), outlier2_iqr=ifelse(turnout>med_ubto,1,0)) # generate alternate fraud measures based on turntout

# Select out the measures
ps_dat=select(data_analysis,block,weight_iv,treatment,turnout,eovharass)

#1. Turnout estimates
todat=na.omit(select(ps_dat,-eovharass))
dim(todat)   

set.seed(8968668)
perms <- genperms(todat$treatment, clustvar =todat$block) # all possible permutations

probs <- todat$weight_iv

ate=estate(Y = todat$turnout, Z =todat$treatment,prob = probs)
Ys <- genouts(todat$turnout,todat$treatment,ate=ate) 
distout <- gendist(Ys,perms,prob=probs) 
sampdist<-dispdist(distout,ate) 

turnout<-ate
turnout_se<-sampdist$sd
turnout_ul<-sampdist$quantile[2]
turnout_ll<-sampdist$quantile[1]            


###########
# Intimidation
pvdat=na.omit(select(ps_dat,-turnout))
dim(pvdat)

set.seed(857580)
perms <- genperms(pvdat$treatment, clustvar =pvdat$block) # all possible permutations

probs <- pvdat$weight_iv

ate=estate(Y = pvdat$eovharass, Z =pvdat$treatment,prob = probs)
Ys <- genouts(pvdat$eovharass,pvdat$treatment,ate=ate) 
distout <- gendist(Ys,perms,prob=probs) 
sampdist<-dispdist(distout,ate)  

eovharass<-ate
eovharass_se<-sampdist$sd
eovharass_ul<-sampdist$quantile[2]
eovharass_ll<-sampdist$quantile[1]

######################
## outliers
ps_dat=select(data_analysis,block,weight_iv,treatment,outlier1_2sd,outlier2_iqr)

# 2 SDS
todat=na.omit(select(ps_dat,-outlier2_iqr))
dim(todat)   

set.seed(8968668)
perms <- genperms(todat$treatment, clustvar =todat$block) # all possible permutations

probs <- todat$weight_iv

ate=estate(Y = todat$outlier1_2sd, Z =todat$treatment,prob = probs)
Ys <- genouts(todat$outlier1_2sd,todat$treatment,ate=ate) 
distout <- gendist(Ys,perms,prob=probs) 
sampdist<-dispdist(distout,ate) 

out1<-ate
out1_se<-sampdist$sd
out1_ul<-sampdist$quantile[2]
out1_ll<-sampdist$quantile[1]        


# IQR
todat=na.omit(select(ps_dat,-outlier1_2sd))
dim(todat)   
names(todat)
set.seed(8968668)
perms <- genperms(todat$treatment, clustvar =todat$block) # all possible permutations

probs <- todat$weight_iv

ate=estate(Y = todat$outlier2_iqr, Z =todat$treatment,prob = probs)
Ys <- genouts(todat$outlier2_iqr,todat$treatment,ate=ate) 
distout <- gendist(Ys,perms,prob=probs) 
sampdist<-dispdist(distout,ate) 

out2<-ate
out2_se<-sampdist$sd
out2_ul<-sampdist$quantile[2]
out2_ll<-sampdist$quantile[1]    

######
## create a plot


ate<-c(eovharass, turnout,out1, out2 )
se<-c(eovharass_se, turnout_se,out1_se, out2_se )
ll<-c(eovharass_ll, turnout_ll, out1_ll, out2_ll )
ul<-c(eovharass_ul, turnout_ul,out1_ul, out2_ul )

dat<-rbind(ate,se,ll,ul)
colnames(dat)<-c("eovharass", "turnout","outlier (2SDs)", "outlier (IQR)")

step<-1:4

main_15<-c(dat[1,])
ll_15<-c(dat[3,])
ul_15<-c(dat[4,])

#-----------------
data_analysis <- data[data$turnout < 1,]

## Re- Generate outlier measures of fraud 
## outlier1 -- outlier defined as 1 if 2 SDs above mean in constituency, 0 otherwise
rob_dat<-select(data_analysis,const,turnout) #select constituency and turnout variables from main dataset
robdat_grp<-group_by(rob_dat,const) #group data by constituency to compute constituency level estimates
const_sum<-summarize(robdat_grp,const_to=mean(turnout, na.rm=T),sdv=sd(turnout,na.rm = T), iqr=IQR(turnout,na.rm = T), upqr=quantile(turnout, 0.75, na.rm = T)) #compute constituency level estimates
# Notes: const_to=Average turnout in constituency, sdv=Standard deviation of turnout in constituency, iqr=interquartile range of turnout, upqr=upper quartile of turnout distribution. 
const_sum<-mutate(const_sum,mean_ubto=const_to+1.96*sdv,med_ubto=upqr+1.5*iqr) # compute upper limits for turnout (i.e., 2 standard deviations away and upqr+1.5*iqr)
#Merge calculation into main dataset
data_analysis<-merge(data_analysis,const_sum,by="const", all.x = TRUE, all.y = FALSE)
data_analysis<-mutate(data_analysis,outlier1_2sd=ifelse(turnout>mean_ubto,1,0), outlier2_iqr=ifelse(turnout>med_ubto,1,0)) # generate alternate fraud measures based on turntout

# Select out the measures
ps_dat=select(data_analysis,block,weight_iv,treatment,turnout,eovharass)

#1. Turnout estimates
todat=na.omit(select(ps_dat,-eovharass))
dim(todat)   

set.seed(8968668)
perms <- genperms(todat$treatment, clustvar =todat$block) # all possible permutations

probs <- todat$weight_iv

ate=estate(Y = todat$turnout, Z =todat$treatment,prob = probs)
Ys <- genouts(todat$turnout,todat$treatment,ate=ate) 
distout <- gendist(Ys,perms,prob=probs) 
sampdist<-dispdist(distout,ate) 

turnout<-ate
turnout_se<-sampdist$sd
turnout_ul<-sampdist$quantile[2]
turnout_ll<-sampdist$quantile[1]            


###########
# Intimidation
pvdat=na.omit(select(ps_dat,-turnout))
dim(pvdat)

set.seed(857580)
perms <- genperms(pvdat$treatment, clustvar =pvdat$block) # all possible permutations

probs <- pvdat$weight_iv

ate=estate(Y = pvdat$eovharass, Z =pvdat$treatment,prob = probs)
Ys <- genouts(pvdat$eovharass,pvdat$treatment,ate=ate) 
distout <- gendist(Ys,perms,prob=probs) 
sampdist<-dispdist(distout,ate)  

eovharass<-ate
eovharass_se<-sampdist$sd
eovharass_ul<-sampdist$quantile[2]
eovharass_ll<-sampdist$quantile[1]

######################
## outliers
ps_dat=select(data_analysis,block,weight_iv,treatment,outlier1_2sd,outlier2_iqr)

# 2 SDS
todat=na.omit(select(ps_dat,-outlier2_iqr))
dim(todat)   

set.seed(8968668)
perms <- genperms(todat$treatment, clustvar =todat$block) # all possible permutations

probs <- todat$weight_iv

ate=estate(Y = todat$outlier1_2sd, Z =todat$treatment,prob = probs)
Ys <- genouts(todat$outlier1_2sd,todat$treatment,ate=ate) 
distout <- gendist(Ys,perms,prob=probs) 
sampdist<-dispdist(distout,ate) 

out1<-ate
out1_se<-sampdist$sd
out1_ul<-sampdist$quantile[2]
out1_ll<-sampdist$quantile[1]        


# IQR
todat=na.omit(select(ps_dat,-outlier1_2sd))
dim(todat)   
names(todat)
set.seed(8968668)
perms <- genperms(todat$treatment, clustvar =todat$block) # all possible permutations

probs <- todat$weight_iv

ate=estate(Y = todat$outlier2_iqr, Z =todat$treatment,prob = probs)
Ys <- genouts(todat$outlier2_iqr,todat$treatment,ate=ate) 
distout <- gendist(Ys,perms,prob=probs) 
sampdist<-dispdist(distout,ate) 

out2<-ate
out2_se<-sampdist$sd
out2_ul<-sampdist$quantile[2]
out2_ll<-sampdist$quantile[1]    

######
## create a plot


ate<-c(eovharass, turnout,out1, out2 )
se<-c(eovharass_se, turnout_se,out1_se, out2_se )
ll<-c(eovharass_ll, turnout_ll, out1_ll, out2_ll )
ul<-c(eovharass_ul, turnout_ul,out1_ul, out2_ul )

dat<-rbind(ate,se,ll,ul)
colnames(dat)<-c("eovharass", "turnout","outlier (2SDs)", "outlier (IQR)")

step<-1:4

main_1<-c(dat[1,])
ll_1<-c(dat[3,])
ul_1<-c(dat[4,])




par(mfrow=c(2, 2))

plot(step,main_3, axes = F,ylim=c(-0.15, 0.15),xlim=c(0.5,4.5), ylab="ATE", xlab="", pch=19, las=1, main="Dropping Turnout > 300 percent")
grid()
axis(at = c(1,2, 3, 4), labels = c("Intimidation", "Turnout", "Outlier (2SDs)","Outlier (IQR)" ), side = 1, cex.axis=0.9)
axis(side = 2,las=1)
segments(step,ll_3, step, ul_3)
abline(h=0,lty=2)

plot(step,main_2, axes = F,ylim=c(-0.15, 0.15),xlim=c(0.5,4.5), ylab="ATE", xlab="", pch=19, las=1, main="Dropping Turnout > 200 percent")
grid()
axis(at = c(1,2, 3, 4), labels = c("Intimidation", "Turnout", "Outlier (2SDs)","Outlier (IQR)" ), side = 1, cex.axis=0.9)
axis(side = 2,las=1)
segments(step,ll_2, step, ul_2)
abline(h=0,lty=2)

plot(step,main_15, axes = F,ylim=c(-0.15, 0.15),xlim=c(0.5,4.5), ylab="ATE", xlab="", pch=19, las=1, main="Dropping Turnout > 150 percent")
grid()
axis(at = c(1,2, 3, 4), labels = c("Intimidation", "Turnout", "Outlier (2SDs)","Outlier (IQR)" ), side = 1, cex.axis=0.9)
axis(side = 2,las=1)
segments(step,ll_15, step, ul_15)
abline(h=0,lty=2)

plot(step,main_1, axes = F,ylim=c(-0.15, 0.15),xlim=c(0.5,4.5), ylab="ATE", xlab="", pch=19, las=1, main="Dropping Turnout > 100 percent")
grid()
axis(at = c(1,2, 3, 4), labels = c("Intimidation", "Turnout", "Outlier (2SDs)","Outlier (IQR)" ), side = 1, cex.axis=0.9)
axis(side = 2,las=1)
segments(step,ll_1, step, ul_1)
abline(h=0,lty=2)